Set working directory
# Ana's WD
# setwd("C:/Users/Ana-Maria's Laptop/Documents/GitHub/infoVisualization")
# Andrei's WD
# setwd("C:/Users/andre/Documents/infoVisualization/")
Imports - libraries and read dataset
#install.packages("png")
#install.packages("gifsky")
#install.packages("gganimate")
#install.packages("gapminder")
#install.packages("ggmap")
#install.packages("esquisse")
#install.packages("ggiraph")
#install.packages("gdtools")
#install.packages("leaflet") # interactive maps
#install.packages("plotly")
#install.packages("tseries")
#install.packages("astsa")
#install.packages("sqldf")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
library(gganimate)
## Warning: package 'gganimate' was built under R version 4.0.5
library(gapminder)
## Warning: package 'gapminder' was built under R version 4.0.5
#library(tidyverse)
library(ggmap)
## Warning: package 'ggmap' was built under R version 4.0.5
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(ggiraph)
## Warning: package 'ggiraph' was built under R version 4.0.5
library(esquisse)
## Warning: package 'esquisse' was built under R version 4.0.5
library(leaflet)
## Warning: package 'leaflet' was built under R version 4.0.5
library(plotly)
## Warning: package 'plotly' was built under R version 4.0.5
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# times series libraries
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tseries)
## Warning: package 'tseries' was built under R version 4.0.5
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(astsa)
library(sqldf)
## Warning: package 'sqldf' was built under R version 4.0.5
## Loading required package: gsubfn
## Warning: package 'gsubfn' was built under R version 4.0.5
## Loading required package: proto
## Warning: package 'proto' was built under R version 4.0.5
## Loading required package: RSQLite
## Warning: package 'RSQLite' was built under R version 4.0.5
# esquisser(df)
earthquakes_csv = "eartquakes_Romania.csv"
df <- read.csv(file = earthquakes_csv)
# convert time column to R timestap
df$time <- gsub('T', ' ', df$time)
df$time <- gsub('Z', '', df$time)
df$time <- as.POSIXct(df$time, format="%Y-%m-%d %H:%M:%OS", tz=Sys.timezone())
#split timestamp into date and time
df$timeYear <- as.Date(df$time)
df$timeDay <- format(df$time, "%H:%M:%OS")
df$year <- format(df$time, "%Y")
df$month <- format(df$time, "%m")
df$yearInt <- as.integer(df$year)
# see data types
str(df)
## 'data.frame': 665 obs. of 27 variables:
## $ time : POSIXct, format: "2017-03-08 13:43:13" "2017-02-08 15:08:20" ...
## $ latitude : num 45.7 45.5 45.7 45.7 45.9 ...
## $ longitude : num 26.5 26.3 26.7 26.5 26.8 ...
## $ depth : num 149 127 129 97 90 ...
## $ mag : num 4.1 4.7 4.4 5.6 4.1 5.6 4.2 2.9 4.3 4.3 ...
## $ magType : chr "mb" "mb" "mb" "mww" ...
## $ nst : int NA NA NA NA NA NA NA NA NA NA ...
## $ gap : num 41 19 48 14 NA 10 73 NA NA 30 ...
## $ dmin : num 0.259 0.222 0.559 0.466 NA ...
## $ rms : num 0.76 1.1 1.23 0.8 1.43 0.96 0.68 1.18 0.63 1.09 ...
## $ net : chr "us" "us" "us" "us" ...
## $ id : chr "us100087pk" "us20008ii5" "us20008ifq" "us10007n3r" ...
## $ updated : chr "2017-04-20T10:53:57.224Z" "2017-03-01T06:16:37.044Z" "2017-03-05T08:24:32.472Z" "2017-03-23T22:52:05.040Z" ...
## $ place : chr "19km N of Gura Teghii, Romania" "11km NNW of Nehoiu, Romania" "4km WNW of Nereju, Romania" "14km W of Nereju, Romania" ...
## $ type : chr "earthquake" "earthquake" "earthquake" "earthquake" ...
## $ horizontalError: num 3.2 6.6 6.4 4.3 6.7 4 3 4.2 3.8 4.9 ...
## $ depthError : num 3.9 5.4 5.4 1.8 6.4 1.8 4.2 8 5 6.6 ...
## $ magError : num 0.127 0.045 0.148 NA 0.157 NA 0.062 NA 0.265 0.111 ...
## $ magNst : int 17 152 13 NA 11 NA 25 NA 4 23 ...
## $ status : chr "reviewed" "reviewed" "reviewed" "reviewed" ...
## $ locationSource : chr "us" "us" "us" "us" ...
## $ magSource : chr "us" "us" "us" "us" ...
## $ timeYear : Date, format: "2017-03-08" "2017-02-08" ...
## $ timeDay : chr "13:43:13" "15:08:20" "09:52:06" "23:20:56" ...
## $ year : chr "2017" "2017" "2017" "2016" ...
## $ month : chr "03" "02" "02" "12" ...
## $ yearInt : int 2017 2017 2017 2016 2016 2016 2016 2016 2016 2015 ...
Describing the dataset: time - self-explanatory, includes: year, month, day, hour, minute, seconds, milliseconds latitude & longitude - the coordinates of the earthquake source depth - the depth at which it originated mag - the physical size of the earthquake Great 8 or more Major 7 - 7.9 Strong 6 - 6.9 Moderate 5 - 5.9 Light 4 - 4.9 Minor 3 - 3.9 magType - category based on magnitude range and distance range. Details: https://www.usgs.gov/natural-hazards/earthquake-hazards/science/magnitude-types?qt-science_center_objects=0#qt-science_center_objects
We start by exploring the variance in the dataset. This way we can better understand each column, besides its initial definition.
mag_hist <- ggplot(df) +
geom_histogram(mapping = aes(x = mag), fill = "steelblue", bins = 40) +
theme_minimal()
mag_hist + ggtitle("Number of recorded earthquakes by magnitude")
year_bar <- ggplot(df) +
geom_bar(mapping = aes(y = year), fill = "steelblue") +
theme_minimal()
year_bar + ggtitle("Number of earthquakes per year (1990 - 2017)")
month_bar <- ggplot(df) +
geom_bar(mapping = aes(x = month), fill = "steelblue") +
theme_minimal()
month_bar + ggtitle("Number of earthquakes by month (1990 - 2017)")
depth_bar <- ggplot(df) +
geom_histogram(mapping = aes(x = depth), fill = "steelblue") +
theme_minimal()
depth_bar + ggtitle("Number of earthquakes by depth")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
magtype_bar <- ggplot(df) +
geom_bar(mapping = aes(x = magType), fill = "steelblue") +
theme_minimal()
magtype_bar + ggtitle("Magnitude type (category)")
nst_bar <- ggplot(df) +
geom_histogram(mapping = aes(x = nst), fill = "steelblue", bins=50) +
theme_minimal()
nst_bar + ggtitle("Nst - number of seismic stations reporting")
## Warning: Removed 270 rows containing non-finite values (stat_bin).
gap_bar <- ggplot(df) +
geom_histogram(mapping = aes(x = gap), fill = "steelblue") +
theme_minimal()
gap_bar + ggtitle("Gap Value (azimuthal gap range)")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 250 rows containing non-finite values (stat_bin).
dmin_bar <- ggplot(df) +
geom_histogram(mapping = aes(x = dmin), fill = "steelblue") +
theme_minimal()
dmin_bar + ggtitle("Dmin Value (distance from epicenter)")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 620 rows containing non-finite values (stat_bin).
rms_bar <- ggplot(df) +
geom_histogram(mapping = aes(x = rms), fill = "steelblue") +
theme_minimal()
rms_bar + ggtitle("Rms Value (root-mean-squared residual of solution)")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 331 rows containing non-finite values (stat_bin).
net_bar <- ggplot(df) +
geom_bar(mapping = aes(x = net), fill = "steelblue") +
theme_minimal()
net_bar + ggtitle("Net Value")
type_bar <- ggplot(df) +
geom_bar(mapping = aes(x = type), fill = "steelblue") +
theme_minimal()
type_bar + ggtitle("Type of data entry")
status_bar <- ggplot(df) +
geom_bar(mapping = aes(x = status), fill = "steelblue") +
theme_minimal()
status_bar + ggtitle("Status of data entry")
locationSource_bar <- ggplot(df) +
geom_bar(mapping = aes(x = locationSource), fill = "steelblue") +
theme_minimal()
locationSource_bar + ggtitle("Location: data source")
magSourceSource_bar <- ggplot(df) +
geom_bar(mapping = aes(x = magSource), fill = "steelblue") +
theme_minimal()
magSourceSource_bar + ggtitle("Magnitude: data source")
horizontalError_hist <- ggplot(df) +
geom_histogram(mapping = aes(x = horizontalError), fill = "steelblue") +
theme_minimal()
horizontalError_hist + ggtitle("Horizontal Error")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 630 rows containing non-finite values (stat_bin).
depthError_hist <- ggplot(df) +
geom_histogram(mapping = aes(x = depthError), fill = "steelblue") +
theme_minimal()
depthError_hist + ggtitle("Depth Error ")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 447 rows containing non-finite values (stat_bin).
magError_hist <- ggplot(df) +
geom_histogram(mapping = aes(x = magError), fill = "steelblue") +
theme_minimal()
magError_hist + ggtitle("Magnitude Error")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 617 rows containing non-finite values (stat_bin).
magNst_hist <- ggplot(df) +
geom_histogram(mapping = aes(x = magNst), fill = "steelblue") +
theme_minimal()
magNst_hist + ggtitle("Number of stations that reported the magnitude ")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 403 rows containing non-finite values (stat_bin).
We start by exploring the most obvious questions: where do most earthquakes happen, including their magnitude as well
ggplot(data=df) +
geom_point(mapping = aes(x = longitude, y = latitude, color=mag))
As we can see, there is a big cluster where most earthquakes happen, with a smaller cluster that is noticeable, followed by sparsely distributed cases Let’s now plot them on a map for a better visualization. Based on common knowledge of earthquakes in Romania, can you guess where the main epicenter will be?
(we are using stamen source for the map, but the commented code also uses google maps. It requires an API key, though)
romania_map <- get_map("Brasov", maptype = "roadmap", zoom = 6)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Brasov&zoom=6&size=640x640&scale=2&maptype=roadmap&language=en-EN&key=xxx-e9WaBDXrHnFl2Tud9HT6DdUPuvs
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Brasov&key=xxx-e9WaBDXrHnFl2Tud9HT6DdUPuvs
romania_box <- c(20, 43, 30, 48.5)
romania_map2 <- get_map(location = romania_box, source = "stamen", maptype = "toner", zoom = 7)
## Source : http://tile.stamen.com/terrain/7/71/44.png
## Source : http://tile.stamen.com/terrain/7/72/44.png
## Source : http://tile.stamen.com/terrain/7/73/44.png
## Source : http://tile.stamen.com/terrain/7/74/44.png
## Source : http://tile.stamen.com/terrain/7/71/45.png
## Source : http://tile.stamen.com/terrain/7/72/45.png
## Source : http://tile.stamen.com/terrain/7/73/45.png
## Source : http://tile.stamen.com/terrain/7/74/45.png
## Source : http://tile.stamen.com/terrain/7/71/46.png
## Source : http://tile.stamen.com/terrain/7/72/46.png
## Source : http://tile.stamen.com/terrain/7/73/46.png
## Source : http://tile.stamen.com/terrain/7/74/46.png
## Source : http://tile.stamen.com/terrain/7/71/47.png
## Source : http://tile.stamen.com/terrain/7/72/47.png
## Source : http://tile.stamen.com/terrain/7/73/47.png
## Source : http://tile.stamen.com/terrain/7/74/47.png
#ggmap(romania_map) +
# geom_point(data = df, aes(x = longitude, y = latitude), color = "steelblue", size = 1)
ggmap(romania_map2) +
geom_point(data = df, aes(x = longitude, y = latitude, color = mag), size = 1)
As we can see, most earthquakes are in Vrancea County, as expected.
Let’s also make an interactive map to play around it
We have colored each pin by earthquake classification type (based on magnitude) and have grouped them in clusters (and an extra without clusters just for showcase).
green: minor blue: light pink: moderate orange: strong red: major black: great
As we can see, most of the recorder earthquakes are either minor or light. As we go up in magnitude, the number of earthquakes decreases.
getColor <- function(quakes) {
sapply(quakes$mag, function(mag) {
if (mag < 4) {
"green"
} else if (mag < 5) {
"blue"
} else if (mag < 6) {
"pink"
} else if (mag < 7) {
"orange"
} else if (mag < 8) {
"red"
} else {
"black"
} })
}
icons <- awesomeIcons(
icon = 'ios-close',
iconColor = 'black',
library = 'ion',
markerColor = getColor(df)
)
leaflet(data = df) %>%
addTiles() %>%
addAwesomeMarkers(~longitude, ~latitude, popup = ~as.character(year), label = ~as.character(mag), icon=icons, clusterOptions = markerClusterOptions())
leaflet(data = df) %>%
addTiles() %>%
addAwesomeMarkers(~longitude, ~latitude, popup = ~as.character(year), label = ~as.character(mag), icon=icons)
Let’s start exploring covariance, as well - starting with the number of earthquakes per magnitude, each year. We started by using a barplot and a freqpoly, but it is quite hard to get useful information due to different number of earthquakes in each year. Thus, we switched to density plot which takes that into consideration, as well.
ggplot(data=df) +
geom_bar(mapping = aes(x = mag, fill = year))
## Warning: position_stack requires non-overlapping x intervals
ggplot(data=df) +
geom_freqpoly(mapping = aes(x = mag, color = year))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=df) +
geom_density(mapping = aes(x = mag, color = year))
Due to the number of years, it is still hard to read the graphs. Let’s use a facet_wrap for a better understanding.
See? Way better.
ggplot(data=df) +
geom_bar(mapping = aes(x = mag, fill = year)) +
facet_wrap(~ year) +
theme_minimal()
ggplot(data=df) +
geom_freqpoly(mapping = aes(x = mag, color = year)) +
facet_wrap(~ year) +
theme_minimal()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data=df) +
geom_density(mapping = aes(x = mag, color = year)) +
facet_wrap(~ year) +
theme_minimal()
There is obviously a correlation between the location and the number of earthquakes. Let’s see if we can find other correlations, as well. How about between the depth and the magnitude
ggplot(data=df) +
geom_point(mapping = aes(x = depth, y = mag, color = year))
ggplot(data=df) +
geom_point(mapping = aes(x = depth, y = mag, color = year)) +
facet_wrap(~ year) +
theme_minimal()
ggplot(data=df) +
geom_line(mapping = aes(x = depth, y = mag, color = year)) +
facet_wrap(~ year) +
theme_minimal()
Ok, let’s switch to 3D plots as it seems we need to take into account more than 2 columns.
Starting with longitude, latitude and magnitude.
longlatmag <- plot_ly(df, x = ~longitude, y = ~latitude, z = ~mag,
marker = list(color = ~mag, colorscale = c('#FFE1A1', '#683531'), showscale = TRUE))
longlatmag <- longlatmag %>% add_markers()
longlatmag <- longlatmag %>% layout(scene = list(xaxis = list(title = 'Longitude'),
yaxis = list(title = 'Latitude'),
zaxis = list(title = 'Magnitude')),
annotations = list(
x = 1.13,
y = 1.05,
text = 'Magnitude',
xref = 'mag',
yref = 'mag',
showarrow = FALSE
))
longlatmag
Ok, now let’s take a look at the latitude, longitude and depth.
It is interesting to see that while there are 2 obvious clusters regarding the location, one cluster (Vrancea one) definitely has earthquakes with more depth. We have also colored them based on magnitude.
longlatdepth <- plot_ly(df, x = ~longitude, y = ~latitude, z = ~depth,
marker = list(color = ~mag, colorscale = c('#FFE1A1', '#683531'), showscale = TRUE))
longlatdepth <- longlatdepth %>% add_markers()
longlatdepth <- longlatdepth %>% layout(scene = list(xaxis = list(title = 'Longitude'),
yaxis = list(title = 'Latitude'),
zaxis = list(title = 'Depth')),
annotations = list(
x = 1.13,
y = 1.05,
text = 'Magnitude',
xref = 'mag',
yref = 'mag',
showarrow = FALSE
))
longlatdepth
Let’s do some more 3D exploring. How do the errors look? And let’s color them based on magnitude to see if the errors are correlated with the recorder magnitude
longlatdepth <- plot_ly(df, x = ~horizontalError, y = ~depthError, z = ~magError,
marker = list(color = ~mag, colorscale = c('#FFE1A1', '#683531'), showscale = TRUE))
longlatdepth <- longlatdepth %>% add_markers()
longlatdepth <- longlatdepth %>% layout(scene = list(xaxis = list(title = 'Horizontal Error'),
yaxis = list(title = 'Depth Error'),
zaxis = list(title = 'Magnitude Error')),
annotations = list(
x = 1.13,
y = 1.05,
text = 'Magnitude',
xref = 'mag',
yref = 'mag',
showarrow = FALSE
))
longlatdepth
## Warning: Ignoring 635 observations
Ok, now let’s take a look at some animations. (be patient when loading them) Not extremely useful, but super cool to look at!
ggplot(data = df) +
geom_point(mapping = aes(x = longitude, y = latitude, color = mag)) +
labs(title = 'Year: {frame_time}', x = 'Longitude', y = 'Latitude') +
transition_time(yearInt) +
ease_aes('linear')
ggplot(data = df) +
geom_bar(mapping = aes(x = mag), fill = 'steelblue') +
labs(title = 'Year: {frame_time}', x = 'Mag') +
transition_time(yearInt) +
ease_aes('linear')
year_mag_mean <- aggregate(list("MagMean"=df[, 5]), list("Year"=df$year), mean)
year_df <- data.frame(year_mag_mean$Year, year_mag_mean$MagMean)
time <- 0
mag <- 0
for (index in 1:nrow(year_df)) {
time[index] <- year_df[index,1]
mag[index] <- year_df[index,2]
}
data_matrix <- matrix(mag,
nrow =length(time),
dimnames = list(time, "data"))
data_ts <- ts(data_matrix, start = 1990)
ts.plot(data_ts,
main = "Romania Earthquake - Magnitude mean per year",
xlab="Year",
ylab="Magnitude mean",
col="#3c4e66",
lwd=1)
acf2(data_ts)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## ACF 0.57 0.31 0.13 -0.14 -0.12 -0.20 -0.19
## PACF 0.57 -0.02 -0.06 -0.28 0.13 -0.18 0.04
my_model <- arima(data_ts, order=c(1,1,1))
stdres <- my_model$residuals/sqrt(my_model$sigma2)
plot(my_model$residuals, main = "Chosen model - ARIMA(1, 1, 1) - Residuals")
qqnorm(stdres, main = "Chosen model - ARIMA(1, 1, 1) - Normal Q-Q Plot of Std Residuals")
shapiro.test(residuals(my_model))
##
## Shapiro-Wilk normality test
##
## data: residuals(my_model)
## W = 0.90685, p-value = 0.01662
my_forecast = predict(arima(data_ts ,order=c(1,1,1)), n.ahead=5)
ts.plot(data_ts,
my_forecast$pred,
main="Romania Earthquake - Magniture mean per year prediction",
col=1:2,
xlab="Year",
ylab="Magnitude mean",
lwd=1)
### CHECK FOR STATIONARITY AND MAKE
### DATA STATIONARY
### Magnitude time series
get_ts_matrix <- function(ts_magnitude) {
time <- 0
mag <- 0
for (index in 1:nrow(ts_magnitude)) {
time[index] <- ts_magnitude[index,1]
mag[index] <- ts_magnitude[index,2]
}
mag_matrix <- matrix(ts_magnitude$df.mag,
nrow =length(ts_magnitude$df.timeYear),
dimnames = list(ts_magnitude$df.timeYear, "data"))
return(mag_matrix)
}
ts_magnitude <- data.frame(df$timeYear, df$mag)
ts_magnitude_plot <- ggplot(ts_magnitude, aes(x=df.timeYear, y=df.mag)) +
ggtitle("Magnitude time series") +
geom_line() +
xlab("")
ts_magnitude_plot
mag_matrix <- get_ts_matrix(ts_magnitude)
acf2(ts(mag_matrix))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
## ACF 0.32 0.29 0.32 0.32 0.30 0.29 0.24 0.29 0.26 0.29 0.29 0.29 0.29 0.31
## PACF 0.32 0.21 0.21 0.17 0.13 0.10 0.03 0.09 0.05 0.09 0.08 0.08 0.07 0.08
## [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
## ACF 0.27 0.28 0.28 0.30 0.23 0.25 0.21 0.19 0.24 0.23 0.24 0.23
## PACF 0.03 0.03 0.04 0.06 -0.03 0.01 -0.04 -0.06 0.03 0.02 0.04 0.00
## [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF 0.20 0.22 0.21 0.17 0.2 0.17 0.20 0.21 0.16 0.20
## PACF -0.02 -0.01 -0.02 -0.05 0.0 -0.02 0.03 0.04 -0.02 0.03
### First diff time series
ts_magnitude$df.mag<- c(0, diff(df$mag))
ts_diff_magnitude_plot <- ggplot(ts_magnitude, aes(x=df.timeYear, y=df.mag)) +
ggtitle("First diff - Magnitude time series") +
geom_line() +
xlab("")
ts_diff_magnitude_plot
diff_mag_matrix <- get_ts_matrix(ts_magnitude)
acf2(ts(diff_mag_matrix))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF -0.48 -0.04 0.02 0.01 -0.01 0.03 -0.07 0.05 -0.04 0.02 0.0 0.00
## PACF -0.48 -0.36 -0.26 -0.19 -0.15 -0.07 -0.13 -0.08 -0.11 -0.10 -0.1 -0.09
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF -0.01 0.04 -0.03 0.00 -0.01 0.06 -0.06 0.05 -0.02 -0.05 0.04 -0.01
## PACF -0.09 -0.04 -0.04 -0.05 -0.07 0.02 -0.02 0.04 0.05 -0.04 -0.02 -0.05
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF 0.02 0.00 -0.03 0.02 0.02 -0.05 0.04 -0.04 0.02 0.04 -0.06 0.02
## PACF -0.01 0.01 0.00 0.01 0.04 -0.01 0.02 -0.04 -0.05 0.01 -0.04 -0.04
### Second diff time series
### ts_magnitude$df.mag<- c(0, diff(ts_magnitude$df.mag))
ts_second_diff_magnitude_plot <- ggplot(ts_magnitude, aes(x=df.timeYear, y=df.mag)) +
ggtitle("Second diff - Magnitude time series") +
geom_line() +
xlab("")
ts_second_diff_magnitude_plot
second_diff_mag_matrix <- get_ts_matrix(ts_magnitude)
acf2(ts(second_diff_mag_matrix))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12]
## ACF -0.48 -0.04 0.02 0.01 -0.01 0.03 -0.07 0.05 -0.04 0.02 0.0 0.00
## PACF -0.48 -0.36 -0.26 -0.19 -0.15 -0.07 -0.13 -0.08 -0.11 -0.10 -0.1 -0.09
## [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF -0.01 0.04 -0.03 0.00 -0.01 0.06 -0.06 0.05 -0.02 -0.05 0.04 -0.01
## PACF -0.09 -0.04 -0.04 -0.05 -0.07 0.02 -0.02 0.04 0.05 -0.04 -0.02 -0.05
## [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF 0.02 0.00 -0.03 0.02 0.02 -0.05 0.04 -0.04 0.02 0.04 -0.06 0.02
## PACF -0.01 0.01 0.00 0.01 0.04 -0.01 0.02 -0.04 -0.05 0.01 -0.04 -0.04
### ACF indicates q
### PACF indicates p
### arima(p,diff,q)
data <- ts(diff_mag_matrix)
### ANALYSING TIME SERIES DATA
### MODELLING, FORECASTING AND DATA FORMATTING IN R
### https://ourcodingclub.github.io/tutorials/time/
#
# df$date <- as.Date(df$time, format = "%d/%b/%Y-%u")
# (time_plot <- ggplot(df, aes(x = date, y = mag)) +
# geom_line() +
# scale_x_date(date_labels = "%Y", date_breaks = "1 year") +
# theme_classic())
#
# (decomp_2 <- ggplot(df, aes(x = date, y = mag)) +
# geom_line() +
# geom_smooth(method = "loess", se = FALSE, span = 0.6) +
# theme_classic())
#
# # Extract month and year and store in separate columns
# df$year <- format(df$date, format = "%Y")
# df$month_num <- format(df$date, format = "%m")
#
# df_ts <- ts(df$mag, start = 1990, end = 2017, freq = 1) # Specify start and end year
#
# df_model <- window(x = df_ts, start = c(1990), end = c(2010))
# df_test <- window(x = df_ts, start = c(2010))
Earthquakes in Romania are a major cause of disaster and sometimes death. It is important to recognize patterns in their behavior as this can help us minimize the caused damage. We did identify some known patterns, such as the 2 important clusters and analyzed the difference between them (such as, the earthquakes in Vrancea county are deeper, more often and more devastating).
Predicting them with time series is next to impossible, as there are many factors. Either way, if we do look at them, there is a clear sesonality to it. Based on that, we are expecting a major earthquake in the following years.